---
title: "Replicating tables and figures for “Income inequality is unrelated to perceived inequality and support for redistribution” by Kris-Stella Trump, forthcoming in Social Science Quarterly"
output: 
  pdf_document:
    keep_tex: true
---



```{r estimates of missingness reported in text, include=F}
# R version 4.2.2 

library(Hmisc)
library(weights)
library(tidyverse)
library(foreign)


#Load main data
issp <- read.dta("ZA5890_v1-0-0.dta", convert.factors = FALSE)
names(issp)
#To keep informative country-year names, read in country-year variable as factor
issp_factor  <- read.dta("ZA5890_v1-0-0.dta", convert.factors = TRUE)
issp$V7 <- issp_factor$V7
#also use this for country variable
issp$country <- issp_factor$V5
#and make sure year is labelled clearly
issp$year <- issp$V4

##### Also load add-on dataset with income perceptions and preferences
#Data citation: 
#ISSP Research Group (2014): International Social Survey Programme: Social Inequality I-IV ADD ON - ISSP 1987-1992-1999-2009. GESIS Data Archive, Cologne. ZA5891 Data file Version 1.1.0, doi:10.4232/1.12041
addon <- read.dta("ZA5891_v1-1-0.dta")
names(addon)

#V8 - Actually earn doctor
#V9 - Actually earn CEO
#V11 - Actually earn: Unskilled worker in a factory
#V12 - actually earn cabinet minister
#V23 Should earn doctor
#V24: Should earn: CEO
#V26 Should earn: unskilled
#V27 Should earn: cabinet minister


# What share of CEO guesses missing
missingness <- addon %>% 
  group_by(V7) %>%
  summarise(n_obs = n(),
            missing_rate = sum(is.na(V9))/n()
            ) %>% 
  arrange(.,desc(missing_rate)
          ) %>%
  filter(missing_rate<1) #remove observations where the item was not asked
#View(missingness)
summary(missingness$missing_rate)
sd(missingness$missing_rate)
sum(missingness$missing_rate>0.3)

# What share of worker guesses missing
missingness_f <- addon %>% 
  group_by(V7) %>%
  summarise(n_obs = n(),
            missing_rate = sum(is.na(V11))/n()
  ) %>% 
  arrange(.,desc(missing_rate)
  ) %>%
  filter(missing_rate<1) #remove observations where the item was not asked
#View(missingness_f)
summary(missingness_f$missing_rate)
sd(missingness_f$missing_rate)
sum(missingness_f$missing_rate>0.3)
``` 


```{r setup for regressions, include=FALSE}

knitr::opts_chunk$set(echo = TRUE)
## setup 
library(haven)
library(tidyverse)
library(margins)
library(lme4)
library(mice)
library(mitools)
library(plotrix)
library(dotwhisker)
library(broom)
library(Amelia)
library(performance)
library(mi)
library(mitml)
library(RColorBrewer)
library(kableExtra)

load("issp_swiid.RData")

# recode Gini to be 0-1 for ease of interpreting coefficients
issp_swiid <- issp_swiid %>%
  map(. %>%
    mutate(gini_mkt = gini_mkt/100,
           gini_disp = gini_disp/100)
  )
```


## Figure 1

```{r figure prep and export, echo=F, warning=F, include=F}
#Create a summary dataset for plotting country means
issp_mean <- issp_swiid %>%
  map(. %>%
        group_by(country, year) %>%
        summarise(gini_mkt_mean = mean(gini_mkt),
                  gini_disp_mean = mean(gini_disp),
                  abs_red_mean = mean(abs_red),
                  toohigh_mean = mean(toohigh),
                  reduce_mean = mean(reduce),
                  perc_log_mean = mean(perc_log),
                  pref_log_mean = mean(pref_log),
                  wealthy_family_mean = mean(getahead_wealthy)
        )
  )
issp_se <- issp_swiid %>%
  map(. %>%
        group_by(country, year) %>%
        summarise(gini_mkt_sd = std.error(gini_mkt),
                  gini_disp_sd = std.error(gini_disp),
                  abs_red_sd = std.error(abs_red),
                  toohigh_sd = std.error(V32),
                  reduce_sd = std.error(reduce),
                  perc_log_sd = std.error(perc_log),
                  pref_log_sd = std.error(pref_log),
                  wealthy_family_sd = std.error(getahead_wealthy)
        )
  )

#Combine the estimates with Rubin"s rules
issp_names <- tibble(issp_mean[[1]][,1], issp_mean[[1]][,2])
issp_melded <- tibble(gini_mkt_mean = numeric(0), gini_disp_mean = numeric(0), abs_red_mean = numeric(0), toohigh_mean  = numeric(0), reduce_mean = numeric(0), perc_log_mean = numeric(0), pref_log_mean = numeric(0), wealthy_family_mean = numeric(0), gini_mkt_se = numeric(0), gini_disp_se = numeric(0), abs_red_se = numeric(0), toohigh_se  = numeric(0), reduce_se = numeric(0), perc_log_se = numeric(0), pref_log_se = numeric(0), wealthy_family_se = numeric(0),)
for (i in 1:nrow(issp_mean[[1]])) {
  q <- rbind(as.numeric(c(issp_mean[[1]][i,c(3:10)])),
             as.numeric(c(issp_mean[[2]][i,c(3:10)])),
             as.numeric(c(issp_mean[[3]][i,c(3:10)])),
             as.numeric(c(issp_mean[[4]][i,c(3:10)])),
             as.numeric(c(issp_mean[[5]][i,c(3:10)]))
  )
  se <- rbind(as.numeric(c(issp_se[[1]][i,c(3:10)])),
              as.numeric(c(issp_se[[2]][i,c(3:10)])),
              as.numeric(c(issp_se[[3]][i,c(3:10)])),
              as.numeric(c(issp_se[[4]][i,c(3:10)])),
              as.numeric(c(issp_se[[5]][i,c(3:10)]))
  )
  combined <- mi.meld(q = q, se= se, byrow=T)
  issp_melded[i, c(1:8)] <- combined$q.mi
  issp_melded[i, c(9:16)] <- combined$se.mi
}
issp_means <- bind_cols(issp_names, issp_melded)
rm(issp_mean, issp_se, issp_melded, issp_names, q, se, i, combined)

figure_gini_perc <- ggplot(issp_means, aes(x=gini_mkt_mean, y=perc_log_mean)) +
  geom_point() + 
  #scale_shape_manual(values=c(21,22,23,24,21,22,23,24,21,22,23,24,21,22,23,24,21,22,23,24,21,22,23,24,21,22,23)) +
  #scale_color_manual(values = MyColours) +
  #scale_fill_manual(values = MyColours) +
  geom_errorbar(aes(ymin=perc_log_mean-1.96*perc_log_se, ymax=perc_log_mean+1.96*perc_log_se)) +
  ylab("Country means:\nPerceived income\nratio (log)") + 
  xlab("Gini coefficient (market)") + 
  theme(axis.title.y = element_text(angle = 0, vjust = 0.5)) 
ggsave(
  "ineq_figure.png",
  figure_gini_perc,
  dpi = 400
)
```


\begin{figure}[hbt!]
    \centering
    \includegraphics[width=0.8\textwidth]{ineq_figure.png}
    \caption{Country-year means of perceived CEO-worker income ratios plotted against market inequality.}
    \label{fig:ineq_figure}
\end{figure}

## Regression tables

```{r developing and validating regression approach, include=F}
#this section tests model fit focusing on a regression of key interest: are perceptions influenced by market inequality

#list out standard control variables - this is not fully automated, just for reference
#try two sets of controls, based on informal DAGs first is theoretically better, less risk of confounding
controls1 <- paste(c("female", "AGE", "DEGREE", "ATTEND", "income.quintile", "V66", "union", "married"), collapse = " + ")
controls2 <- paste(c("female", "AGE", "DEGREE", "ATTEND", "income.quintile", "V66", "union", "married", "VOTE_LR", "TOPBOT"), collapse = " + ")

#Compare model fits with different specifications and control variables
perc.mkt.toobasic <- issp_swiid %>%
  map(~ lm(perc_log ~ gini_mkt, weights = WEIGHT, data=.))
testEstimates(perc.mkt.toobasic)

perc.mkt.basic <- issp_swiid %>%
  map(~ lmer(perc_log ~ gini_mkt + (1 | country/year), weights = WEIGHT, data=., REML=F))
testEstimates(perc.mkt.basic)

perc.mkt.ctrls <- issp_swiid %>%
  map(~ lmer(perc_log ~ gini_mkt + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))
testEstimates(perc.mkt.ctrls)

perc.mkt.ctrls.full <- issp_swiid %>%
  map(~ lmer(perc_log ~ gini_mkt + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + VOTE_LR + TOPBOT + (1 | country/year), weights=WEIGHT, data=.))
testEstimates(perc.mkt.ctrls.full)

#test fit
AIC(perc.mkt.toobasic[[1]],perc.mkt.basic[[1]],perc.mkt.ctrls[[1]],perc.mkt.ctrls.full[[1]])
#multilevel is a clear improvement on nothing
#including controls is a slight improvement but most action remains in the country terms based on smaller reduction in AIC
# last two controls a v small improvement, and questionable based on DAGS - will not use

#test for singularity
check_singularity(perc.mkt.ctrls[[1]])
#passes

check_model(perc.mkt.ctrls[[1]])
#looks fine

``` 

```{r table 1 prep, results='asis', echo=F, include=F}
# perception regressions 
perc.mkt.ctrls <- issp_swiid %>%
  map(~ lmer(perc_log ~ gini_mkt + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

perc.disp.ctrls <- issp_swiid %>%
  map(~ lmer(perc_log ~ gini_disp  + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

# Table 1 - perceived inequality as a function of real inequality
perc.ineq.table <- tibble(' ' = character(0) , 'Model 1' = numeric(0), 'Model 2' = numeric(0))

perc.ineq.table[1,] <- list('Gini (market)', testEstimates(perc.mkt.ctrls)$estimates["gini_mkt","Estimate"], NA)
perc.ineq.table[2,] <- list(' ', testEstimates(perc.mkt.ctrls)$estimates["gini_mkt","Std.Error"], NA)
perc.ineq.table[3,] <- list('Gini (disposable)', NA, testEstimates(perc.disp.ctrls)$estimates["gini_disp","Estimate"])
perc.ineq.table[4,] <- list(' ', NA, testEstimates(perc.disp.ctrls)$estimates["gini_disp","Std.Error"])
perc.ineq.table <- kable(perc.ineq.table, digits=2)
perc.ineq.table
#This gives the numbers; final table is manually adjusted for style and included below
```


\begin{table}
\caption{\label{tab:perc_ineq}Perceived inequality as a function of actual inequality.}
\centering
\begin{tabular}{l|c|c}
\hline
\multicolumn{3}{c}{DV: Perceived income ratio (log)} \\
\hline
  & Model 1 & Model 2\\
\hline
Gini (market, 0-1) & 0.12 & \\
 & (0.28) & \\
\hline
Gini (disposable, 0-1) &  & 0.35\\
 &  & (0.48) \\
\hline
Country-year fixed effects & Y & Y \\
Country fixed effects & Y & Y \\
Demographic controls & Y & Y \\ 
\hline
\multicolumn{3}{l}{*p<0.05, **p<0.01, ***p<0.001} \\
\multicolumn{3}{l}{Results from multilevel linear models fitted on multiply imputed data.}
\end{tabular}
\end{table}


```{r Table 2 prep, echo=F, include=F}
#Table 2 - preferred ratio as function of inequality and perceptions

pref.mkt.ctrls <- issp_swiid %>%
  map(~ lmer(pref_log ~ gini_mkt + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

pref.disp.ctrls <- issp_swiid %>%
  map(~ lmer(pref_log ~ gini_disp + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

pref.mkt.ctrls.perc <- issp_swiid %>%
  map(~ lmer(pref_log ~ gini_mkt + perc_log + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

pref.disp.ctrls.perc <- issp_swiid %>%
  map(~ lmer(pref_log ~ gini_disp + perc_log + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

pref.ineq.table <- tibble(' ' = character(0) , 'Model 1' = numeric(0), 'Model 2' = numeric(0), 'Model 3' = numeric(0), 'Model 4' = numeric(0))

pref.ineq.table[1,] <- list('Gini (market)', testEstimates(pref.mkt.ctrls)$estimates["gini_mkt","Estimate"], testEstimates(pref.mkt.ctrls.perc)$estimates["gini_mkt","Estimate"], NA , NA)
pref.ineq.table[2,] <- list(' ', testEstimates(pref.mkt.ctrls)$estimates["gini_mkt","Std.Error"], testEstimates(pref.mkt.ctrls.perc)$estimates["gini_mkt","Std.Error"], NA , NA)

pref.ineq.table[3,] <- list('Gini (disposable)', NA,NA, testEstimates(pref.disp.ctrls)$estimates["gini_disp","Estimate"], testEstimates(pref.disp.ctrls.perc)$estimates["gini_disp","Estimate"])
pref.ineq.table[4,] <- list(' ', NA,NA, testEstimates(pref.disp.ctrls)$estimates["gini_disp","Std.Error"], testEstimates(pref.disp.ctrls.perc)$estimates["gini_disp","Std.Error"])

pref.ineq.table[5,] <- list('Perceived pay ratio (log)', NA, testEstimates(pref.mkt.ctrls.perc)$estimates["perc_log","Estimate"], NA, testEstimates(pref.disp.ctrls.perc)$estimates["perc_log","Estimate"])
pref.ineq.table[6,] <- list(' ', NA, testEstimates(pref.mkt.ctrls.perc)$estimates["perc_log","Std.Error"], NA, testEstimates(pref.disp.ctrls.perc)$estimates["perc_log","Std.Error"])

pref.ineq.table<- kable(pref.ineq.table, digits=2)
pref.ineq.table

#This gives the numbers; final table is adjusted for style manually and included below
```

\begin{table}
\caption{\label{tab:pref_ineq}Preferred inequality as a function of perceived and actual inequality.}
\centering
\begin{tabular}{l|c|c|c|c}
\hline
\multicolumn{5}{c}{DV: Preferred income ratio (log)} \\
\hline
  & Model 1 & Model 2 & Model 3 & Model 4\\
\hline
Gini (market, 0-1) & 0.25 & -0.01 &  & \\
 & (0.28) & (0.25) & &\\
\hline
Gini (disposable, 0-1) & & & 0.42 & 0.04\\
 & & & (0.40) & (0.28)\\
\hline
Perceived pay ratio (log) &  & 0.50*** &  & 0.50*** \\
 &  & (0.00) &  & (0.00)\\
\hline
Country-year fixed effects & Y & Y & Y & Y \\
Country fixed effects & Y & Y & Y & Y \\
Demographic controls & Y & Y & Y & Y \\ 
\hline
\multicolumn{5}{l}{*p<0.05, **p<0.01, ***p<0.001}\\
\multicolumn{5}{l}{Results from multilevel linear models fitted on multiply imputed data.}
\end{tabular}
\end{table}


```{r Table 3 prep, echo=F, include=F}
toohigh.disp.ctrls.perc <- issp_swiid %>%
  map(~ lmer(V32 ~ gini_disp + perc_log + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

reduce.disp.ctrls.perc <- issp_swiid %>%
  map(~ lmer(reduce ~ gini_disp + perc_log + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))

taxrich.disp.ctrls.perc <- issp_swiid %>%
  map(~ lmer(as.numeric(V39) ~ gini_disp + perc_log + female + AGE + DEGREE + ATTEND + income.quintile + V66 + union + married + (1 | country/year), weights=WEIGHT, data=.))


# Table 3 - support for redistr as function of disposable inequality and perceptions
redistr.ineq.table <- tibble(' ' = character(0) , 'Too high' = numeric(0), "Reduce diff's"= numeric(0), "Tax high incomes" = numeric(0))

redistr.ineq.table[1,] <- list('Gini (disposable)', testEstimates(toohigh.disp.ctrls.perc)$estimates["gini_disp","Estimate"], testEstimates(reduce.disp.ctrls.perc)$estimates["gini_disp","Estimate"], testEstimates(taxrich.disp.ctrls.perc)$estimates["gini_disp","Estimate"])
redistr.ineq.table[2,] <- list(' ', testEstimates(toohigh.disp.ctrls.perc)$estimates["gini_disp","Std.Error"], testEstimates(reduce.disp.ctrls.perc)$estimates["gini_disp","Std.Error"], testEstimates(taxrich.disp.ctrls.perc)$estimates["gini_disp","Std.Error"])
redistr.ineq.table[3,] <- list('Perceived pay ratio (log)', testEstimates(toohigh.disp.ctrls.perc)$estimates["perc_log","Estimate"], testEstimates(reduce.disp.ctrls.perc)$estimates["perc_log","Estimate"], testEstimates(taxrich.disp.ctrls.perc)$estimates["perc_log","Estimate"])
redistr.ineq.table[4,] <- list(' ', testEstimates(toohigh.disp.ctrls.perc)$estimates["perc_log","Std.Error"], testEstimates(reduce.disp.ctrls.perc)$estimates["perc_log","Std.Error"], testEstimates(taxrich.disp.ctrls.perc)$estimates["perc_log","Std.Error"])
redistr.ineq.table <- kable(redistr.ineq.table, "latex", digits=2)
redistr.ineq.table

#This gives the numbers; final table is manually adjusted for style and included below

```


\begin{table}
\caption{\label{tab:redist_ineq}Normative attitudes toward redistribution and inequality.}
\centering
\begin{tabular}{l|c|c|c}
\hline
\multicolumn{4}{c}{Dependent variables:} \\
\hline
  & Inequality too high & Reduce diff's in income & Tax high incomes \\
\hline
Gini (disposable, 0-1) & -0.42 & 0.67 & -0.09 \\
 & (0.40) & (0.53) & (0.32) \\
\hline
Perceived pay ratio (log) & -0.07*** & 0.01* & -0.04*** \\
 & (0.00) & (0.00) & (0.00) \\
\hline
Country-year fixed effects & Y & Y & Y  \\
Country fixed effects & Y & Y & Y  \\
Demographic controls & Y & Y & Y  \\ 
\hline
\multicolumn{4}{l}{*p<0.05, **p<0.01, ***p<0.001}\\
\multicolumn{4}{l}{Results from multilevel linear models fitted on multiply imputed data.}
\end{tabular}
\end{table}



## Showing calculation for in-text interpretation

If significant, it would imply that a person who lives in the most equal country-year in the dataset and perceives a 10:1 CEO to factory worker income ratio there would, if moved to the most unequal country-year, start perceiving a `r 10^(1+0.35*(0.48-0.18))`:1 CEO to factor worker income ratio. 

